comp_reg_dt.rmd)
usage contains inf. usage <= 40 is used to filter out such observations, but is it correct? Why usage <= 40 is selected although there are some observations whose usage is more than 40?# /*===== load packages =====*/
source("./GitControlled/Codes/0_1_ls_packages.R")
# /*===== load data =====*/
# /*---- newly updated data ----*/
reg_data <- readRDS("./Shared/Data/WaterAnalysis/comp_reg_dt.rds")
# /*---- newly updated aggregated data ()----*/
reg_agg_data <- readRDS("./Shared/Data/WaterAnalysis/agg_comp_reg_dt.rds")
# /*---- old data ----*/
# var_ls_old <- c('precip_in', 'tmin_in', 'tmax_in', 'gdd_in', 'sand_pct', 'silt_pct', 'clay_pct', 'slope', 'kv', 'awc','usage','treat2', 'tr', 'year', 'nrdname')
# reg_data_old <- readRDS("./Shared/Data/WaterAnalysis/data_w_LR_TB.rds") %>%
# .[,..var_ls_old]
tmmn_in and tmmx_in from regression anlaysis.
# /*===== preparation =====*/
# /*---- reg_data ----*/
gene_vars <- c("nrdname", "year")
weather_var <- c("pr_in", "pet_in", "gdd_in")
soil_var <- c("sandtotal_r", "claytotal_r", "silttotal_r", "ksat_r", "awc_r", "slope_r")
reg_long_dt <- reg_data %>%
.[,c(gene_vars, weather_var, soil_var), with=FALSE] %>%
melt(id.vars = gene_vars)
# /*=================================================*/
#' # Summary statistics
# /*=================================================*/
reg_data[,c("nrdname", weather_var, soil_var), with = FALSE] %>%
datasummary(
formula = as.formula( paste(paste0(c(weather_var, soil_var), collapse = "+"), "nrdname * (Mean + SD + MinMax)", sep="~")),
output = "gt", data = .
)
| Lower Republican | Tri-Basin | |||||
|---|---|---|---|---|---|---|
| Mean | SD | MinMax | Mean | SD | MinMax | |
| pr_in | 19.73 | 4.80 | [8.09, 30.96] | 19.87 | 4.68 | [8.13, 29.85] |
| pet_in | 39.58 | 4.52 | [33.39, 50.94] | 39.33 | 4.52 | [33.06, 50.7] |
| gdd_in | 1774.01 | 120.07 | [1528.15, 2044.3] | 1758.25 | 122.56 | [1504, 2007.93] |
| sandtotal_r | 15.08 | 9.45 | [5.01, 74.95] | 12.19 | 4.17 | [5.52, 71.88] |
| claytotal_r | 22.20 | 2.83 | [8.71, 35.23] | 23.28 | 2.25 | [9.48, 35.38] |
| silttotal_r | 62.76 | 7.35 | [16.35, 67.37] | 64.55 | 3.13 | [18.64, 67.42] |
| ksat_r | 8.55 | 4.73 | [4.18, 61.92] | 7.55 | 2.42 | [4.3, 56.41] |
| awc_r | 0.21 | 0.01 | [0.12, 0.22] | 0.21 | 0.01 | [0.13, 0.22] |
| slope_r | 3.62 | 2.78 | [0, 15.93] | 2.95 | 2.69 | [0.06, 21.01] |
pr_in(inch)tmmn_in and tmmx_in(celsius)pet_in(inch)sandtotal_r, claytotal_r, silttotal_r (%)ksat_r (um/s)awc_r (cm/cm)slope_r (%)# /*=================================================*/
#' # Quick Visualization
# /*=================================================*/
# /*===== Comparison: Treatment and Control groups =====*/
# /*---- weather ----*/
ggplot(reg_long_dt[variable %in% weather_var,]) +
geom_boxplot(aes(x=factor(year), y=value, fill=nrdname)) +
facet_grid(variable ~ . , scale='free') +
theme(
legend.position = "bottom",
title = element_text(face = "bold"),
strip.text.y = element_text(face = "bold")
) +
ggtitle("Annual Variations in Climate Variables")
# /*---- soil ----*/
ggplot(reg_long_dt[variable %in% soil_var,]) +
geom_histogram(aes(x=value))+
facet_grid(nrdname ~ variable ,scale='free') +
ggtitle("Histogram by Soil Variables") +
theme(
title = element_text(face = "bold"),
strip.text.y = element_text(face = "bold"),
strip.text.x = element_text(face = "bold")
)
Similar variations in the weather and soil variables between treatment and control group.
# /*=================================================*/
#' # Correlation Matrix
# /*=================================================*/
# /*---- weather ----*/
GGally::ggpairs(
data = reg_data[,..weather_var],
# lower = list(continuous = wrap("points", size=0.5)),
diag= list(continuous="barDiag"),
title = "Scatterplot matrix of the Weather Data"
)
gdd from the regression?pet (Reference grass evaportranspiration), too?
# # /*---- soil ----*/
ggpairs(
data = reg_data[,..soil_var],
# lower = list(continuous = wrap("points", color= "blue", size=0.5)),
diag=list(continuous="barDiag"),
title = "Scatterplot matrix of the Soil Data"
)